#data(field)
#dim(field)
# head(field)
data(veggyraw)
data(veggy)
Example image of germinating plants and red flags of those unsuccessful pots
## check duplicated image pots
veggyraw=mutate(veggyraw, indexrep=paste(site,qp,pos,day,sep='_'))
## Number of pots used for replicability analysis
dim(veggyraw[duplicated(veggyraw$indexrep),]) # There are 790 in total from both experiments
## [1] 790 13
unique(veggyraw[duplicated(veggyraw$indexrep),]$day)
## [1] "2015-11-16" "2015-11-09" "2015-12-04" "2015-11-13" "2015-12-09"
## [6] "2016-02-07" "2016-02-09" "2015-11-23" "2016-01-30" "2015-10-30"
## [11] "2015-11-27"
unique(veggyraw[duplicated(veggyraw$indexrep),]$qp)
## [1] 105 109 141 164 168 181 190 193 201 231 232 233 249 257 260 31 330
## [18] 33 45 50 67
## Run the test for replicability
library(MCMCglmm)
## Loading required package: Matrix
## Loading required package: coda
## Loading required package: ape
lmm=MCMCglmm(data=veggyraw[duplicated(veggyraw$indexrep),], countgreen ~1, random = ~ indexrep, family = 'poisson',verbose=F)
print ( h2MCMC(lmm,randomname = 'indexrep') )
## $Mode
## var1
## 0.9974931
##
## $HPD
## lower upper
## var1 0.9957086 0.9984387
## attr(,"Probability")
## [1] 0.95
## Produce cleaner data than veggyraw
## Since there are duplicates but the replicability is >99%, generate average of
## counts for two pots of the same identity (can be due to two pictures per
## tray/day). Necessary for merge with master dataset later
# veggy %>% mutate( indexrep=paste(site,qp,pos,day,sep='_')) %>%
# group_by(indexrep) %>% summarise(site=unique(site),
# qp=unique(qp),
# pos=unique(pos),
# trayid=unique(trayid),
# rep=unique(rep),
# indpop=unique(indpop),
# water=unique(water),
# id=unique(id),
# day=unique(day),
# countgreen=mean(countgreen),
# countred=mean(countred)
# ) ->veggy
# veggy %>% head()
# veggy %>% tail()
# dim(veggy)
#
# veggy= veggy %>% mutate( potindex=paste(site,qp,pos,id,sep="_")) %>% select(-indexrep)
#
# devtools::use_data(veggy,overwrite = T) # Save the data for the package
data(veggy)
dim(veggy)
This is not run because takes some time, instead I load the data already produced
veggy= veggy %>% mutate( potindex=paste(site,qp,pos,id,sep="_")) %>% mutate(site_water=paste(site,water,sep="_"))
veggy %>% filter(indpop=='i') %>%
ggplot() + geom_line(aes(y=countgreen,x=day,group=potindex, color=factor(site_water)),alpha=0.1 ) +
labs(
y = "# green pixels",
colour = ""
)#+ theme(legend.position="none")
veggy %>% filter(indpop=='p') %>%
ggplot() +
# geom_line(aes(y=countgreen,x=day,group=potindex, color=factor(site_water)),alpha=0.1 ) +
geom_line(aes(y=countgreen,x=day,group=potindex, color=factor(site_water)),alpha=0.05 ) +
labs(
y = "# green pixels",
colour = ""
)#+ theme(legend.position="none")
p1<-
veggy %>% filter(indpop=='p',site=='madrid') %>%
ggplot() +
geom_line(aes(y=countgreen,x=day,group=potindex, color=factor(water)),alpha=0.05 ) +
scale_color_manual(values = c("blue", "red"))+
labs(
y = "# green pixels",
colour = "Watering"
)+ guides(colour = guide_legend(override.aes = list(alpha = 1)))
p2<-
veggy %>% filter(indpop=='p',site=='tuebingen') %>%
ggplot() +
geom_line(aes(y=countgreen,x=day,group=potindex, color=factor(water)),alpha=0.05 ) +
scale_color_manual(values = c("blue", "red"))+
labs(
y = "# green pixels",
colour = "Watering"
)+ guides(colour = guide_legend(override.aes = list(alpha = 1)))
panel<-plot_grid(p1,p2)
save_plot(filename="../figs/Figure_green_trajectory.pdf",plot = panel, base_width = 10,base_height = 3.8)
veggy %>%
ggplot() + geom_line(aes(y=countred,x=day,group=potindex, color=factor(site_water)),alpha=0.1 ) #+ theme(legend.position="none")
p<-ggplot(data=veggy,aes(x=log10(countred+1)),fill='black' )+geom_histogram() + labs(x='log 10 (# red pixels +1)')
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
save_plot(filename="../figs/Figure_redcount_histogram.pdf",plot = p, base_width = 5,base_height = 4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
veggy %>% filter(trayid == "27_i_l" ,pos=="a3", site=="madrid") %>%
ggplot(.) + geom_point(aes(y=countgreen,x=day),color="green") +
geom_point(aes(y=countred,x=day),color="red")
This is an example of a fail pot, where green pixels are low all the time but a red flag was put to identify them
veggy %>% filter(trayid == "27_i_l" ,pos=="a4", site=="madrid") %>%
ggplot(.) + geom_point(aes(y=countgreen,x=day),color="green") +
geom_point(aes(y=countred,x=day),color="red")
The decrease in January of some is due to the thinning of plants for
veggy %>% filter(trayid == "9_p_l" ,pos=="e7", site=="tuebingen") %>%
ggplot(.) + geom_point(aes(y=countgreen,x=day),color="green") +
geom_point(aes(y=countred,x=day),color="red")
This plot is a populatino replicate therefore there is no sudden decay, only that due to mortality at the end of the experiment
# just get the total number of red points
red=veggy %>% group_by(site,qp,pos,trayid,rep,indpop,water,id) %>% summarize(redsum=sum(countred)) %>% mutate(identifier=paste(sep="_",site,qp,pos))
# calculate variance across groups by establishing different threshold values
fvals<-sapply(seq(1e4,1e6,by=1000),function(x){
summary(aov(red$redsum ~ red$redsum >x))[[1]][["F value"]][1]
})
plot(fvals ~ seq(1e4,1e6,by=1000))
foundthreshold= seq(1e4,1e6,by=1000)[which(fvals==max(fvals,na.rm=T))] # =152000
print(foundthreshold)
## [1] 152000
# generate a bad flag column
table(red$redsum > foundthreshold)
##
## FALSE TRUE
## 22780 1967
badflags = red %>% filter(redsum > foundthreshold) %>% select(identifier)
## Adding missing grouping variables: `site`, `qp`, `pos`, `trayid`, `rep`, `indpop`, `water`
badflags=unique(badflags$identifier)
length(badflags)
## [1] 1967
By calculating F statistic, we calculate the variance between two groups of pots whose pixels are counted. For that several thresholds are tried, the one that separates better the two distribution is the one that will be used
Just to have the number of green counts:
# get total number of green counts
green=veggy %>% group_by(site,qp,pos,trayid,rep,indpop,water,id) %>%
summarize(greensum=sum(countgreen)) %>%
mutate(identifier=paste(sep="_",site,qp,pos))
badflagsgreen=green %>% filter(greensum < 1e4) %>% select(identifier) # just in case it is needed later
## Adding missing grouping variables: `site`, `qp`, `pos`, `trayid`, `rep`, `indpop`, `water`
green=green%>%select(-identifier)
Model all trajectories as sigmoidal functions:
veg<-
veggy %>%
# head(1e4) %>% # for profiling
# a couple of filters
mutate(identifier=paste(sep="_",site,qp,pos)) %>% # get an indentifier variable
filter( ! identifier %in% badflags) %>% # remove those pots with bad identifiers
# filter( ! identifier %in% badflagsgreen) %>% # filter if there is not enough green
# select early positions
mutate(starting=startday(site)) %>% # add the start day of the experiment ina per row basis for later calculations
mutate(daycount= fn(day - as.Date(starting) ) )%>% # trick to filter the 40 first dates depending on experiment
filter(daycount <60) %>% # 40 days because we started the thinning like 1 month after they started germination, probably would be better to do it in a per pot basis.
group_by(site,qp,pos,trayid,rep,indpop,water,id) %>% # group observations by pot to analyse each time series
# calculate several trajectory informations
summarize(
ger.a=fitsigmoid(countgreen,daycount,parameter='a'),
ger.b=fitsigmoid(countgreen,daycount,parameter='b'),
ger.c=fitsigmoid(countgreen,daycount,parameter='c'),
firstgreen=firstgreen(y=countgreen,x=daycount), # first green pixels is kind of arbitrary
lin.0=fitlinear(countgreen,daycount,'significance') # Probably the regression one is not so nice.
)
## Get the
# merge with total counts of red and green
veg=veg %>%
full_join(.,red,by=c('site','qp','pos','trayid','rep','indpop','water','id')) %>%
full_join(.,green,by=c('site','qp','pos','trayid','rep','indpop','water','id'))
devtools::use_data(veg,overwrite = T)
data(veg)
Plotting the sigmoidal curves of 1000 pots
vegh<-head(veg,1000) %>% filter(ger.a != "NA")
plot( ylab='Predicted # pixels',xlab='Days of experiment',
ylim=c(0,50000),
y=
getsigmoid(
a= substrRight(vegh$ger.a,lastpos = 4,giveright = F)[2],
b= substrRight(vegh$ger.b,lastpos = 4,giveright = F)[2],
c= substrRight(vegh$ger.c,lastpos = 4,giveright = F)[2]
),
x=1:40,
type='l'
)
for( i in 1:nrow(vegh)){
lines( y=
getsigmoid(
a= substrRight(vegh$ger.a,lastpos = 4,giveright = F)[i],
b= substrRight(vegh$ger.b,lastpos = 4,giveright = F)[i],
c= substrRight(vegh$ger.c,lastpos = 4,giveright = F)[i]
),
x=1:40,
col=transparent('black')
)
}
<<<<<<< HEAD
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.5 LTS
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=de_DE.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MCMCglmm_2.23 ape_3.5 coda_0.18-1
## [4] Matrix_1.2-7.1 field_0.0.0.9000 moiR_0.0.1
## [7] RColorBrewer_1.1-2 devtools_1.12.0 cowplot_0.7.0
## [10] ggplot2_2.2.1 dplyr_0.5.0 knitr_1.16
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.10 plyr_1.8.4 tools_3.3.2
## [4] digest_0.6.12 evaluate_0.10 memoise_1.1.0
## [7] tibble_1.2 gtable_0.2.0 nlme_3.1-129
## [10] lattice_0.20-34 DBI_0.6-1 commonmark_0.9
## [13] yaml_2.1.14 withr_1.0.2 stringr_1.2.0
## [16] roxygen2_5.0.1.9000 xml2_1.0.0 rprojroot_1.2
## [19] grid_3.3.2 R6_2.2.0 rmarkdown_1.5.9000
## [22] tensorA_0.36 corpcor_1.6.8 tidyr_0.6.1
## [25] magrittr_1.5 backports_1.0.5 scales_0.4.1
## [28] htmltools_0.3.6 assertthat_0.1 cubature_1.1-2
## [31] colorspace_1.2-7 labeling_0.3 stringi_1.1.3
## [34] lazyeval_0.2.0 munsell_0.4.3
=======
library(lme4)
library(MCMCglmm)
veg %>%
filter(redsum < 1e4) %>%
# filter(greensum >1e4) %>%
mutate(
a= removetail(ger.a,4),
b= removetail(ger.b,4),
c= removetail(ger.c,4)
) %>%
lmer(formula = a ~ (1|id) + (1|water)+ (1|site) + (1|indpop) ) -> lmod
lmod %>% randomvariance(.,var =c('id','water','site',"indpop") )
## [,1]
## [1,] 0.003424558
## [2,] 0.002360285
## [3,] 0.000000000
## [4,] 0.048074078
veg %>%
filter(redsum < 1e4) %>%
# filter(greensum >1e4) %>%
mutate(
a= removetail(ger.a,4),
b= removetail(ger.b,4),
c= removetail(ger.c,4)
) %>%
lmer(formula = b ~ (1|id) + (1|water)+ (1|site) + (1|indpop) ) -> lmod
lmod %>% randomvariance(.,var =c('id','water','site',"indpop") )
## [,1]
## [1,] 0.0008525735
## [2,] 0.0017037188
## [3,] 0.0398655522
## [4,] 0.3056023807
veg %>%
filter(redsum < 1e4) %>%
# filter(greensum >1e4) %>%
mutate(
a= removetail(ger.a,4),
b= removetail(ger.b,4),
c= removetail(ger.c,4)
) %>%
lmer(formula = c ~ (1|id) + (1|water)+ (1|site) + (1|indpop) ) -> lmod
lmod %>% randomvariance(.,var =c('id','water','site',"indpop") )
## [,1]
## [1,] 0.001048828
## [2,] 0.003458954
## [3,] 0.074997945
## [4,] 0.336709837
Not very satisfactory. Most of the effect is about individual versus population replicates
veg %>%
filter(redsum < 1e4) %>%
# filter(greensum >1e4) %>%
mutate(
a= removetail(ger.a,4),
b= removetail(ger.b,4),
c= removetail(ger.c,4),
l=removetail(lin.0)
) %>%
filter(water=='h', indpop=='i', site=='madrid') %>%
# lmer(formula = b ~ (1|id) ) -> lmod
# lmer(formula = a ~ (1|id) ) -> lmod
# lmer(formula = c ~ (1|id) ) -> lmod
# lmer(formula = greensum ~ (1|id) ) -> lmod
# lmer(formula = redsum ~ (1|id) ) -> lmod
lmer(formula = l ~ (1|id) ) -> lmod
## Warning in fn(substrRight(x, lastpos = 8, giveright = F)): NAs introduced
## by coercion
lmod %>% randomvariance(.,var =c('id') )
## [,1]
## [1,] 2.533265e-14